import os
import xai
import logging as log
import warnings
import matplotlib.pyplot as plt
from matplotlib import cm
import sys, os
from util.commons import *
from util.ui import *
from util.model import *
from util.split import *
from util.dataset import *
from IPython.display import display, HTML
For this example we are going to use 'Adult Census Dataset'. It consists of both categorical and numerical features.
dataset, msg = get_dataset('census')
display(msg)
display(dataset.df)
"Dataset 'census (Adult census dataset)' loaded successfully. For further information about this dataset please visit: https://ethicalml.github.io/xai/index.html?highlight=load_census#xai.data.load_census"
| age | workclass | education | education-num | marital-status | occupation | relationship | ethnicity | gender | capital-gain | capital-loss | hours-per-week | loan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 39 | State-gov | Bachelors | 13 | Never-married | Adm-clerical | Not-in-family | White | Male | 2174 | 0 | 40 | <=50K |
| 1 | 50 | Self-emp-not-inc | Bachelors | 13 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 13 | <=50K |
| 2 | 38 | Private | HS-grad | 9 | Divorced | Handlers-cleaners | Not-in-family | White | Male | 0 | 0 | 40 | <=50K |
| 3 | 53 | Private | 11th | 7 | Married-civ-spouse | Handlers-cleaners | Husband | Black | Male | 0 | 0 | 40 | <=50K |
| 4 | 28 | Private | Bachelors | 13 | Married-civ-spouse | Prof-specialty | Wife | Black | Female | 0 | 0 | 40 | <=50K |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 32556 | 27 | Private | Assoc-acdm | 12 | Married-civ-spouse | Tech-support | Wife | White | Female | 0 | 0 | 38 | <=50K |
| 32557 | 40 | Private | HS-grad | 9 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0 | 0 | 40 | >50K |
| 32558 | 58 | Private | HS-grad | 9 | Widowed | Adm-clerical | Unmarried | White | Female | 0 | 0 | 40 | <=50K |
| 32559 | 22 | Private | HS-grad | 9 | Never-married | Adm-clerical | Own-child | White | Male | 0 | 0 | 20 | <=50K |
| 32560 | 52 | Self-emp-inc | HS-grad | 9 | Married-civ-spouse | Exec-managerial | Wife | White | Female | 15024 | 0 | 40 | >50K |
32561 rows × 13 columns
There are values in the dataset that are unknown (\?). In this step all rows containing such values are going to be removed.
dataset.df['workclass'].unique()
array([' State-gov', ' Self-emp-not-inc', ' Private', ' Federal-gov',
' Local-gov', ' ?', ' Self-emp-inc', ' Without-pay',
' Never-worked'], dtype=object)
dataset.df.loc[dataset.df['workclass'] == ' ?']
| age | workclass | education | education-num | marital-status | occupation | relationship | ethnicity | gender | capital-gain | capital-loss | hours-per-week | loan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 27 | 54 | ? | Some-college | 10 | Married-civ-spouse | ? | Husband | Asian-Pac-Islander | Male | 0 | 0 | 60 | >50K |
| 61 | 32 | ? | 7th-8th | 4 | Married-spouse-absent | ? | Not-in-family | White | Male | 0 | 0 | 40 | <=50K |
| 69 | 25 | ? | Some-college | 10 | Never-married | ? | Own-child | White | Male | 0 | 0 | 40 | <=50K |
| 77 | 67 | ? | 10th | 6 | Married-civ-spouse | ? | Husband | White | Male | 0 | 0 | 2 | <=50K |
| 106 | 17 | ? | 10th | 6 | Never-married | ? | Own-child | White | Female | 34095 | 0 | 32 | <=50K |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 32530 | 35 | ? | Bachelors | 13 | Married-civ-spouse | ? | Wife | White | Female | 0 | 0 | 55 | >50K |
| 32531 | 30 | ? | Bachelors | 13 | Never-married | ? | Not-in-family | Asian-Pac-Islander | Female | 0 | 0 | 99 | <=50K |
| 32539 | 71 | ? | Doctorate | 16 | Married-civ-spouse | ? | Husband | White | Male | 0 | 0 | 10 | >50K |
| 32541 | 41 | ? | HS-grad | 9 | Separated | ? | Not-in-family | Black | Female | 0 | 0 | 32 | <=50K |
| 32542 | 72 | ? | HS-grad | 9 | Married-civ-spouse | ? | Husband | White | Male | 0 | 0 | 25 | <=50K |
1836 rows × 13 columns
dataset.df = remove_undefined_rows(' ?', dataset.df)
dataset.df['workclass'].unique()
array([' State-gov', ' Self-emp-not-inc', ' Private', ' Federal-gov',
' Local-gov', ' Self-emp-inc', ' Without-pay'], dtype=object)
Three visualization functions offered by the XAI module will be used for analyzing the dataset.
%matplotlib inline
plt.style.use('ggplot')
warnings.filterwarnings('ignore')
imbalanced_cols = ['gender', 'ethnicity']
xai.imbalance_plot(dataset.df, *imbalanced_cols)
_ = xai.correlations(dataset.df, include_categorical=True, plot_type="matrix", plt_kwargs={'figsize': (6, 6)})
_ = xai.correlations(dataset.df, include_categorical=True, plt_kwargs={'figsize': (8, 6)})
13-May-23 12:00:26 - No categorical_cols passed so inferred using np.object, np.int8 and np.bool: Index(['workclass', 'education', 'marital-status', 'occupation',
'relationship', 'ethnicity', 'gender', 'loan'],
dtype='object'). If you see an error these are not correct, please provide them as a string array as: categorical_cols=['col1', 'col2', ...]
In the cell below the target variable is selected. In this example we will use the column loan as target variable, which shows whether a person earns more than 50k (>50K | <=50K) per year.
df_X, df_y, msg = split_feature_target(dataset.df, "loan")
df_y
13-May-23 12:00:27 - Target 'loan' selected successfully.
0 <=50K
1 <=50K
2 <=50K
3 <=50K
4 <=50K
...
30713 <=50K
30714 >50K
30715 <=50K
30716 <=50K
30717 >50K
Name: loan, Length: 30718, dtype: object
Four models are going to be trained on this dataset. In the output below we can see accuracy, classification reports, confusion matrix and ROC Curve for each model.
# Create empty models
initial_models, msg = fill_empty_models(df_X, df_y, 4)
models = []
model1 = initial_models[0]
msg = fill_model(model1, Algorithm.LOGISTIC_REGRESSION, Split(SplitTypes.NORMAL))
models.append(model1)
model_1 = models[0]
13-May-23 12:00:27 - Value of RANDOM_NUMBER is set to 72
Fitting 5 folds for each of 1 candidates, totalling 5 fits
13-May-23 12:00:28 - Best parameters: {'model__C': 1.0, 'model__penalty': 'l2', 'model__solver': 'newton-cg'}
13-May-23 12:00:29 - Model accuracy: 0.8529730902777778
13-May-23 12:00:29 - Classification report:
precision recall f1-score support
<=50K 0.88 0.93 0.91 6921
>50K 0.75 0.61 0.67 2295
accuracy 0.85 9216
macro avg 0.82 0.77 0.79 9216
weighted avg 0.85 0.85 0.85 9216
13-May-23 12:00:29 - Model Model 1 trained successfully!
model2 = initial_models[1]
msg = fill_model(model2, Algorithm.DECISION_TREE, Split(SplitTypes.NORMAL))
models.append(model2)
model_2 = models[1]
Fitting 5 folds for each of 1 candidates, totalling 5 fits
13-May-23 12:00:30 - Best parameters: {'model__criterion': 'gini', 'model__max_depth': 8, 'model__max_features': 1.0, 'model__splitter': 'best'}
13-May-23 12:00:30 - Model accuracy: 0.8579644097222222
13-May-23 12:00:30 - Classification report:
precision recall f1-score support
<=50K 0.88 0.95 0.91 6921
>50K 0.78 0.59 0.68 2295
accuracy 0.86 9216
macro avg 0.83 0.77 0.79 9216
weighted avg 0.85 0.86 0.85 9216
13-May-23 12:00:30 - Model Model 2 trained successfully!
model3 = initial_models[2]
msg = fill_model(model3, Algorithm.RANDOM_FOREST, Split(SplitTypes.NORMAL))
models.append(model3)
model_3 = models[2]
Fitting 5 folds for each of 1 candidates, totalling 5 fits
13-May-23 12:03:22 - Best parameters: {'model__n_estimators': 4500, 'model__min_samples_split': 5, 'model__min_samples_leaf': 4, 'model__max_features': 'sqrt', 'model__max_depth': 57, 'model__criterion': 'gini', 'model__bootstrap': False}
13-May-23 12:03:25 - Model accuracy: 0.86328125
13-May-23 12:03:25 - Classification report:
precision recall f1-score support
<=50K 0.88 0.94 0.91 6921
>50K 0.78 0.62 0.69 2295
accuracy 0.86 9216
macro avg 0.83 0.78 0.80 9216
weighted avg 0.86 0.86 0.86 9216
13-May-23 12:03:36 - Model Model 3 trained successfully!
model4 = initial_models[3]
msg = fill_model(model4, Algorithm.SVC, Split(SplitTypes.NORMAL))
models.append(model4)
model_4 = models[3]
Fitting 5 folds for each of 42 candidates, totalling 210 fits
13-May-23 13:05:57 - Best parameters: {'model__gamma': 0.1, 'model__C': 1.0}
13-May-23 13:06:00 - Model accuracy: 0.8599175347222222
13-May-23 13:06:00 - Classification report:
precision recall f1-score support
<=50K 0.88 0.95 0.91 6921
>50K 0.79 0.60 0.68 2295
accuracy 0.86 9216
macro avg 0.83 0.77 0.80 9216
weighted avg 0.85 0.86 0.85 9216
13-May-23 13:06:09 - Model Model 4 trained successfully!
In the following steps we will use global interpretation techniques that help us to answer questions like how does a model behave in general? What features drive predictions and what features are completely useless. This data may be very important in understanding the model better. Most of the techniques work by investigating the conditional interactions between the target variable and the features on the complete dataset.
The importance of a feature is the increase in the prediction error of the model after we permuted the feature’s values, which breaks the relationship between the feature and the true outcome. A feature is “important” if permuting it increases the model error. This is because in that case, the model relied heavily on this feature for making right prediction. On the other hand, a feature is “unimportant” if permuting it doesn’t affect the error by much or doesn’t change it at all.
In the first case, we use ELI5, which does not permute the features but only visualizes the weight of each feature.
plot = generate_feature_importance_plot(FeatureImportanceType.ELI5, model_1)
display(plot)
13-May-23 13:06:09 - Generating a feature importance plot using ELI5 for Model 1 ...
y= >50K top features
| Weight? | Feature |
|---|---|
| +2.339 | capital-gain |
| +1.440 | marital-status_ Married-AF-spouse |
| +1.337 | marital-status_ Married-civ-spouse |
| +1.273 | relationship_ Wife |
| +0.928 | occupation_ Exec-managerial |
| +0.866 | gender_ Male |
| +0.810 | occupation_ Protective-serv |
| +0.722 | occupation_ Tech-support |
| +0.720 | education-num |
| +0.647 | occupation_ Prof-specialty |
| +0.619 | workclass_ Federal-gov |
| +0.569 | education_ 1st-4th |
| … 24 more positive … | |
| … 18 more negative … | |
| -0.616 | marital-status_ Married-spouse-absent |
| -0.634 | occupation_ Other-service |
| -0.892 | marital-status_ Never-married |
| -0.922 | occupation_ Farming-fishing |
| -1.038 | relationship_ Own-child |
| -1.121 | education_ Preschool |
| -1.482 | occupation_ Priv-house-serv |
| -3.285 | <BIAS> |
plot = generate_feature_importance_plot(FeatureImportanceType.ELI5, model_2)
display(plot)
13-May-23 13:06:09 - Generating a feature importance plot using ELI5 for Model 2 ...
| Weight | Feature |
|---|---|
| 0.4184 | marital-status_ Married-civ-spouse |
| 0.2093 | education-num |
| 0.2002 | capital-gain |
| 0.0661 | capital-loss |
| 0.0492 | age |
| 0.0270 | hours-per-week |
| 0.0115 | occupation_ Exec-managerial |
| 0.0046 | gender_ Male |
| 0.0031 | relationship_ Wife |
| 0.0025 | education_ HS-grad |
| 0.0008 | workclass_ Self-emp-inc |
| 0.0008 | occupation_ Transport-moving |
| 0.0007 | workclass_ Local-gov |
| 0.0007 | occupation_ Adm-clerical |
| 0.0006 | occupation_ Prof-specialty |
| 0.0006 | workclass_ Private |
| 0.0005 | education_ Preschool |
| 0.0005 | occupation_ Craft-repair |
| 0.0005 | occupation_ Handlers-cleaners |
| 0.0005 | marital-status_ Married-AF-spouse |
| … 41 more … | |
plot = generate_feature_importance_plot(FeatureImportanceType.ELI5, model_3)
display(plot)
13-May-23 13:06:09 - Generating a feature importance plot using ELI5 for Model 3 ...
| Weight | Feature |
|---|---|
| 0.1645 ± 0.1081 | capital-gain |
| 0.1355 ± 0.2651 | marital-status_ Married-civ-spouse |
| 0.1081 ± 0.1322 | education-num |
| 0.0900 ± 0.2219 | relationship_ Husband |
| 0.0864 ± 0.0886 | age |
| 0.0529 ± 0.0593 | hours-per-week |
| 0.0437 ± 0.1322 | marital-status_ Never-married |
| 0.0403 ± 0.0360 | capital-loss |
| 0.0286 ± 0.0459 | occupation_ Exec-managerial |
| 0.0218 ± 0.0409 | occupation_ Prof-specialty |
| 0.0191 ± 0.0389 | education_ Bachelors |
| 0.0185 ± 0.0544 | relationship_ Not-in-family |
| 0.0180 ± 0.0535 | gender_ Male |
| 0.0155 ± 0.0383 | relationship_ Wife |
| 0.0143 ± 0.0579 | relationship_ Own-child |
| 0.0141 ± 0.0334 | education_ Masters |
| 0.0098 ± 0.0183 | occupation_ Other-service |
| 0.0091 ± 0.0201 | education_ HS-grad |
| 0.0083 ± 0.0307 | marital-status_ Divorced |
| 0.0075 ± 0.0276 | relationship_ Unmarried |
| … 41 more … | |
%%script false --no-raise-error
plot = generate_feature_importance_plot(FeatureImportanceType.ELI5, model_4)
display(plot)
Couldn't find program: 'false'
rbos_eli5=calculate_feature_importance_rbo(FeatureImportanceType.ELI5, models, 5, 20, 1)
display(rbos_eli5.style
.background_gradient(
subset=['Model 1_Model 2', 'Model 1_Model 3', 'Model 2_Model 3'],
vmin=0.5, vmax=0.99, cmap=cm.get_cmap('Blues') )
.hide_index()
.background_gradient(
subset=['Mean'],
cmap=cm.get_cmap('Reds')))
| p | d | Model 1_Model 2 | Model 1_Model 3 | Model 2_Model 3 | Mean |
|---|---|---|---|---|---|
| 0.800000 | 5 | 0.300000 | 0.580000 | 0.612000 | 0.497000 |
| 0.833000 | 6 | 0.291000 | 0.528000 | 0.656000 | 0.492000 |
| 0.857000 | 7 | 0.338000 | 0.485000 | 0.634000 | 0.486000 |
| 0.875000 | 8 | 0.377000 | 0.450000 | 0.660000 | 0.496000 |
| 0.889000 | 9 | 0.454000 | 0.507000 | 0.681000 | 0.547000 |
| 0.900000 | 10 | 0.444000 | 0.521000 | 0.661000 | 0.542000 |
| 0.909000 | 11 | 0.435000 | 0.500000 | 0.641000 | 0.525000 |
| 0.917000 | 12 | 0.426000 | 0.480000 | 0.623000 | 0.510000 |
| 0.923000 | 13 | 0.417000 | 0.493000 | 0.635000 | 0.515000 |
| 0.929000 | 14 | 0.408000 | 0.504000 | 0.646000 | 0.519000 |
| 0.933000 | 15 | 0.425000 | 0.490000 | 0.655000 | 0.523000 |
| 0.938000 | 16 | 0.441000 | 0.501000 | 0.641000 | 0.528000 |
| 0.941000 | 17 | 0.455000 | 0.511000 | 0.627000 | 0.531000 |
| 0.944000 | 18 | 0.468000 | 0.500000 | 0.635000 | 0.534000 |
| 0.947000 | 19 | 0.481000 | 0.509000 | 0.623000 | 0.538000 |
| 0.950000 | 20 | 0.512000 | 0.499000 | 0.610000 | 0.540000 |
display(rbos_eli5[rbos_eli5['Mean'] == rbos_eli5['Mean'].max()])
| p | d | Model 1_Model 2 | Model 1_Model 3 | Model 2_Model 3 | Mean | |
|---|---|---|---|---|---|---|
| 4 | 0.889 | 9 | 0.454 | 0.507 | 0.681 | 0.547 |
%matplotlib inline
plt.rcParams['figure.figsize'] = [14, 15]
plt.style.use('ggplot')
warnings.filterwarnings('ignore')
plot = generate_feature_importance_plot(FeatureImportanceType.SKATER, model_1)
display(plot)
13-May-23 13:06:11 - Generating a feature importance plot using SKATER for Model 1 ... 13-May-23 13:06:11 - Initializing Skater - generating new in-memory model. This operation may be time-consuming so please be patient. 2023-05-13 13:06:12,928 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[61/61] features ████████████████████ Time elapsed: 3 seconds
plot = generate_feature_importance_plot(FeatureImportanceType.SKATER, model_2)
display(plot)
13-May-23 13:06:17 - Generating a feature importance plot using SKATER for Model 2 ... 13-May-23 13:06:17 - Initializing Skater - generating new in-memory model. This operation may be time-consuming so please be patient. 2023-05-13 13:06:18,383 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[61/61] features ████████████████████ Time elapsed: 3 seconds
plot = generate_feature_importance_plot(FeatureImportanceType.SKATER, model_3)
display(plot)
13-May-23 13:06:22 - Generating a feature importance plot using SKATER for Model 3 ... 13-May-23 13:06:22 - Initializing Skater - generating new in-memory model. This operation may be time-consuming so please be patient. 2023-05-13 13:06:29,572 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[61/61] features ████████████████████ Time elapsed: 178 seconds
plot = generate_feature_importance_plot(FeatureImportanceType.SKATER, model_4)
display(plot)
13-May-23 13:09:28 - Generating a feature importance plot using SKATER for Model 4 ... 13-May-23 13:09:28 - Initializing Skater - generating new in-memory model. This operation may be time-consuming so please be patient. 2023-05-13 13:09:36,257 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progress_bar=False
[61/61] features ████████████████████ Time elapsed: 12 seconds
rbos_skater=calculate_feature_importance_rbo(FeatureImportanceType.SKATER, models, 5, 20, 1)
display(rbos_skater.style
.background_gradient(
subset=['Model 1_Model 2', 'Model 1_Model 3', 'Model 1_Model 4', 'Model 2_Model 3', 'Model 2_Model 4', 'Model 3_Model 4'],
vmin=0.5, vmax=0.99, cmap=cm.get_cmap('Blues') )
.hide_index()
.background_gradient(
subset=['Mean'],
cmap=cm.get_cmap('Reds')))
| p | d | Model 1_Model 2 | Model 1_Model 3 | Model 1_Model 4 | Model 2_Model 3 | Model 2_Model 4 | Model 3_Model 4 | Mean |
|---|---|---|---|---|---|---|---|---|
| 0.800000 | 5 | 0.692000 | 0.462000 | 0.770000 | 0.770000 | 0.677000 | 0.612000 | 0.664000 |
| 0.833000 | 6 | 0.726000 | 0.578000 | 0.785000 | 0.785000 | 0.658000 | 0.723000 | 0.709000 |
| 0.857000 | 7 | 0.695000 | 0.617000 | 0.742000 | 0.742000 | 0.693000 | 0.701000 | 0.698000 |
| 0.875000 | 8 | 0.715000 | 0.649000 | 0.753000 | 0.753000 | 0.672000 | 0.728000 | 0.712000 |
| 0.889000 | 9 | 0.731000 | 0.676000 | 0.720000 | 0.763000 | 0.693000 | 0.706000 | 0.715000 |
| 0.900000 | 10 | 0.745000 | 0.660000 | 0.690000 | 0.734000 | 0.712000 | 0.723000 | 0.711000 |
| 0.909000 | 11 | 0.757000 | 0.679000 | 0.734000 | 0.777000 | 0.727000 | 0.773000 | 0.741000 |
| 0.917000 | 12 | 0.767000 | 0.696000 | 0.742000 | 0.785000 | 0.741000 | 0.754000 | 0.748000 |
| 0.923000 | 13 | 0.747000 | 0.680000 | 0.720000 | 0.792000 | 0.723000 | 0.765000 | 0.738000 |
| 0.929000 | 14 | 0.728000 | 0.694000 | 0.726000 | 0.772000 | 0.761000 | 0.748000 | 0.738000 |
| 0.933000 | 15 | 0.711000 | 0.704000 | 0.733000 | 0.778000 | 0.744000 | 0.756000 | 0.738000 |
| 0.938000 | 16 | 0.694000 | 0.692000 | 0.715000 | 0.760000 | 0.730000 | 0.789000 | 0.730000 |
| 0.941000 | 17 | 0.692000 | 0.679000 | 0.699000 | 0.760000 | 0.731000 | 0.796000 | 0.726000 |
| 0.944000 | 18 | 0.690000 | 0.687000 | 0.704000 | 0.760000 | 0.733000 | 0.802000 | 0.729000 |
| 0.947000 | 19 | 0.688000 | 0.675000 | 0.729000 | 0.760000 | 0.735000 | 0.808000 | 0.732000 |
| 0.950000 | 20 | 0.686000 | 0.683000 | 0.715000 | 0.759000 | 0.736000 | 0.814000 | 0.732000 |
display(rbos_skater[rbos_skater['Mean'] == rbos_skater['Mean'].max()])
| p | d | Model 1_Model 2 | Model 1_Model 3 | Model 1_Model 4 | Model 2_Model 3 | Model 2_Model 4 | Model 3_Model 4 | Mean | |
|---|---|---|---|---|---|---|---|---|---|
| 7 | 0.917 | 12 | 0.767 | 0.696 | 0.742 | 0.785 | 0.741 | 0.754 | 0.748 |
In the cell below we use the SHAP (SHapley Additive exPlanations). It uses a combination of feature contributions and game theory to come up with SHAP values. Then, it computes the global feature importance by taking the average of the SHAP value magnitudes across the dataset.
from shap import initjs
initjs()
%matplotlib inline
plt.style.use('ggplot')
warnings.filterwarnings('ignore')
generate_feature_importance_plot(FeatureImportanceType.SHAP, model_1)
13-May-23 13:09:48 - Generating a feature importance plot using SHAP for Model 1 ... 13-May-23 13:09:48 - Initializing Shap - calculating shap values. This operation is time-consuming so please be patient.
generate_feature_importance_plot(FeatureImportanceType.SHAP, model_2)
13-May-23 13:11:08 - Generating a feature importance plot using SHAP for Model 2 ... 13-May-23 13:11:08 - Initializing Shap - calculating shap values. This operation is time-consuming so please be patient.
generate_feature_importance_plot(FeatureImportanceType.SHAP, model_3)
13-May-23 13:12:27 - Generating a feature importance plot using SHAP for Model 3 ... 13-May-23 13:12:27 - Initializing Shap - calculating shap values. This operation is time-consuming so please be patient.
generate_feature_importance_plot(FeatureImportanceType.SHAP, model_4)
13-May-23 15:25:29 - Generating a feature importance plot using SHAP for Model 4 ... 13-May-23 15:25:29 - Initializing Shap - calculating shap values. This operation is time-consuming so please be patient.
rbos_shap=calculate_feature_importance_rbo(FeatureImportanceType.SHAP, models, 5, 20, 1)
display(rbos_shap.style
.background_gradient(
subset=['Model 1_Model 2', 'Model 1_Model 3', 'Model 1_Model 4', 'Model 2_Model 3', 'Model 2_Model 4', 'Model 3_Model 4'],
vmin=0.5, vmax=0.99, cmap=cm.get_cmap('Blues') )
.hide_index()
.background_gradient(
subset=['Mean'],
cmap=cm.get_cmap('Reds')))
| p | d | Model 1_Model 2 | Model 1_Model 3 | Model 1_Model 4 | Model 2_Model 3 | Model 2_Model 4 | Model 3_Model 4 | Mean |
|---|---|---|---|---|---|---|---|---|
| 0.800000 | 5 | 0.932000 | 0.770000 | 0.612000 | 0.770000 | 0.570000 | 0.595000 | 0.708000 |
| 0.833000 | 6 | 0.870000 | 0.785000 | 0.590000 | 0.785000 | 0.551000 | 0.642000 | 0.704000 |
| 0.857000 | 7 | 0.818000 | 0.798000 | 0.623000 | 0.742000 | 0.531000 | 0.678000 | 0.698000 |
| 0.875000 | 8 | 0.822000 | 0.810000 | 0.649000 | 0.753000 | 0.609000 | 0.706000 | 0.725000 |
| 0.889000 | 9 | 0.826000 | 0.821000 | 0.671000 | 0.763000 | 0.634000 | 0.686000 | 0.733000 |
| 0.900000 | 10 | 0.869000 | 0.830000 | 0.690000 | 0.773000 | 0.694000 | 0.704000 | 0.760000 |
| 0.909000 | 11 | 0.874000 | 0.838000 | 0.706000 | 0.781000 | 0.677000 | 0.720000 | 0.766000 |
| 0.917000 | 12 | 0.846000 | 0.846000 | 0.753000 | 0.789000 | 0.662000 | 0.767000 | 0.777000 |
| 0.923000 | 13 | 0.820000 | 0.823000 | 0.735000 | 0.796000 | 0.646000 | 0.749000 | 0.761000 |
| 0.929000 | 14 | 0.796000 | 0.801000 | 0.720000 | 0.803000 | 0.686000 | 0.788000 | 0.766000 |
| 0.933000 | 15 | 0.775000 | 0.807000 | 0.704000 | 0.784000 | 0.697000 | 0.771000 | 0.756000 |
| 0.938000 | 16 | 0.753000 | 0.788000 | 0.689000 | 0.766000 | 0.685000 | 0.757000 | 0.740000 |
| 0.941000 | 17 | 0.735000 | 0.792000 | 0.697000 | 0.771000 | 0.693000 | 0.765000 | 0.742000 |
| 0.944000 | 18 | 0.739000 | 0.776000 | 0.705000 | 0.755000 | 0.681000 | 0.772000 | 0.738000 |
| 0.947000 | 19 | 0.722000 | 0.780000 | 0.712000 | 0.740000 | 0.669000 | 0.758000 | 0.730000 |
| 0.950000 | 20 | 0.724000 | 0.783000 | 0.719000 | 0.744000 | 0.677000 | 0.784000 | 0.738000 |
display(rbos_shap[rbos_shap['Mean'] == rbos_shap['Mean'].max()])
| p | d | Model 1_Model 2 | Model 1_Model 3 | Model 1_Model 4 | Model 2_Model 3 | Model 2_Model 4 | Model 3_Model 4 | Mean | |
|---|---|---|---|---|---|---|---|---|---|
| 7 | 0.917 | 12 | 0.846 | 0.846 | 0.753 | 0.789 | 0.662 | 0.767 | 0.777 |
Rank-Biased Overlap (RBO) is a similarity metric used to compare the similarity of two ranked lists based on the degree of overlap between their items. The RBO score ranges from 0 to 1, where 1 represents complete overlap between the two lists, and 0 indicates no overlap. With the RBO method we will compare the different feature importance lists for the same model.
rbos_model_1 = calculate_model_rbo(model_1, 5, 25, 1)
display(rbos_model_1.style
.background_gradient(
subset=['ELI5_SKATER', 'ELI5_SHAP', 'SKATER_SHAP'],
vmin=0.5, vmax=0.99, cmap=cm.get_cmap('Blues') )
.hide_index()
.background_gradient(
subset=['Mean'],
cmap=cm.get_cmap('Reds')))
| p | d | ELI5_SKATER | ELI5_SHAP | SKATER_SHAP | Mean |
|---|---|---|---|---|---|
| 0.800000 | 5 | 0.300000 | 0.300000 | 0.692000 | 0.431000 |
| 0.833000 | 6 | 0.358000 | 0.358000 | 0.793000 | 0.503000 |
| 0.857000 | 7 | 0.349000 | 0.405000 | 0.762000 | 0.505000 |
| 0.875000 | 8 | 0.387000 | 0.395000 | 0.831000 | 0.538000 |
| 0.889000 | 9 | 0.421000 | 0.428000 | 0.804000 | 0.551000 |
| 0.900000 | 10 | 0.449000 | 0.496000 | 0.818000 | 0.588000 |
| 0.909000 | 11 | 0.440000 | 0.486000 | 0.829000 | 0.585000 |
| 0.917000 | 12 | 0.463000 | 0.477000 | 0.838000 | 0.593000 |
| 0.923000 | 13 | 0.453000 | 0.496000 | 0.846000 | 0.598000 |
| 0.929000 | 14 | 0.472000 | 0.487000 | 0.854000 | 0.604000 |
| 0.933000 | 15 | 0.488000 | 0.503000 | 0.860000 | 0.617000 |
| 0.938000 | 16 | 0.504000 | 0.519000 | 0.867000 | 0.630000 |
| 0.941000 | 17 | 0.517000 | 0.532000 | 0.893000 | 0.647000 |
| 0.944000 | 18 | 0.509000 | 0.523000 | 0.877000 | 0.636000 |
| 0.947000 | 19 | 0.522000 | 0.535000 | 0.881000 | 0.646000 |
| 0.950000 | 20 | 0.553000 | 0.547000 | 0.867000 | 0.656000 |
| 0.952000 | 21 | 0.545000 | 0.557000 | 0.870000 | 0.657000 |
| 0.955000 | 22 | 0.557000 | 0.568000 | 0.874000 | 0.666000 |
| 0.957000 | 23 | 0.550000 | 0.578000 | 0.894000 | 0.674000 |
| 0.958000 | 24 | 0.557000 | 0.585000 | 0.896000 | 0.679000 |
| 0.960000 | 25 | 0.567000 | 0.579000 | 0.914000 | 0.687000 |
display(rbos_model_1[rbos_model_1['Mean'] == rbos_model_1['Mean'].max()])
| p | d | ELI5_SKATER | ELI5_SHAP | SKATER_SHAP | Mean | |
|---|---|---|---|---|---|---|
| 20 | 0.96 | 25 | 0.567 | 0.579 | 0.914 | 0.687 |
rbos_model_2 = calculate_model_rbo(model_2, 5, None, 1)
display(rbos_model_2.style
.background_gradient(
subset=['ELI5_SKATER', 'ELI5_SHAP', 'SKATER_SHAP'],
vmin=0.5, vmax=0.99, cmap=cm.get_cmap('Blues') )
.hide_index()
.background_gradient(
subset=['Mean'],
cmap=cm.get_cmap('Reds')))
| p | d | ELI5_SKATER | ELI5_SHAP | SKATER_SHAP | Mean |
|---|---|---|---|---|---|
| 0.800000 | 5 | 0.892000 | 0.850000 | 0.932000 | 0.891000 |
| 0.833000 | 6 | 0.960000 | 0.854000 | 0.870000 | 0.895000 |
| 0.857000 | 7 | 0.962000 | 0.859000 | 0.875000 | 0.899000 |
| 0.875000 | 8 | 0.915000 | 0.865000 | 0.928000 | 0.903000 |
| 0.889000 | 9 | 0.961000 | 0.913000 | 0.933000 | 0.936000 |
| 0.900000 | 10 | 0.963000 | 0.879000 | 0.899000 | 0.914000 |
| 0.909000 | 11 | 0.930000 | 0.884000 | 0.937000 | 0.917000 |
| 0.917000 | 12 | 0.899000 | 0.856000 | 0.909000 | 0.888000 |
| 0.923000 | 13 | 0.872000 | 0.831000 | 0.882000 | 0.862000 |
| 0.929000 | 14 | 0.845000 | 0.834000 | 0.858000 | 0.846000 |
| 0.933000 | 15 | 0.873000 | 0.837000 | 0.860000 | 0.857000 |
| 0.938000 | 16 | 0.849000 | 0.840000 | 0.838000 | 0.842000 |
display(rbos_model_2[rbos_model_2['Mean'] == rbos_model_2['Mean'].max()])
| p | d | ELI5_SKATER | ELI5_SHAP | SKATER_SHAP | Mean | |
|---|---|---|---|---|---|---|
| 4 | 0.889 | 9 | 0.961 | 0.913 | 0.933 | 0.936 |
rbos_model_3 = calculate_model_rbo(model_3, 5, 25, 1)
display(rbos_model_3.style
.background_gradient(
subset=['ELI5_SKATER', 'ELI5_SHAP', 'SKATER_SHAP'],
vmin=0.5, vmax=0.99, cmap=cm.get_cmap('Blues') )
.hide_index()
.background_gradient(
subset=['Mean'],
cmap=cm.get_cmap('Reds')))
| p | d | ELI5_SKATER | ELI5_SHAP | SKATER_SHAP | Mean |
|---|---|---|---|---|---|
| 0.800000 | 5 | 0.652000 | 0.595000 | 0.770000 | 0.672000 |
| 0.833000 | 6 | 0.701000 | 0.709000 | 0.852000 | 0.754000 |
| 0.857000 | 7 | 0.738000 | 0.689000 | 0.809000 | 0.745000 |
| 0.875000 | 8 | 0.718000 | 0.717000 | 0.870000 | 0.768000 |
| 0.889000 | 9 | 0.741000 | 0.739000 | 0.880000 | 0.787000 |
| 0.900000 | 10 | 0.760000 | 0.758000 | 0.889000 | 0.802000 |
| 0.909000 | 11 | 0.776000 | 0.738000 | 0.862000 | 0.792000 |
| 0.917000 | 12 | 0.790000 | 0.752000 | 0.869000 | 0.804000 |
| 0.923000 | 13 | 0.802000 | 0.763000 | 0.874000 | 0.813000 |
| 0.929000 | 14 | 0.785000 | 0.802000 | 0.852000 | 0.813000 |
| 0.933000 | 15 | 0.793000 | 0.810000 | 0.857000 | 0.820000 |
| 0.938000 | 16 | 0.803000 | 0.796000 | 0.885000 | 0.828000 |
| 0.941000 | 17 | 0.809000 | 0.802000 | 0.867000 | 0.826000 |
| 0.944000 | 18 | 0.836000 | 0.829000 | 0.891000 | 0.852000 |
| 0.947000 | 19 | 0.823000 | 0.836000 | 0.894000 | 0.851000 |
| 0.950000 | 20 | 0.810000 | 0.823000 | 0.898000 | 0.844000 |
| 0.952000 | 21 | 0.797000 | 0.810000 | 0.883000 | 0.830000 |
| 0.955000 | 22 | 0.819000 | 0.816000 | 0.886000 | 0.840000 |
| 0.957000 | 23 | 0.824000 | 0.804000 | 0.888000 | 0.839000 |
| 0.958000 | 24 | 0.827000 | 0.808000 | 0.890000 | 0.842000 |
| 0.960000 | 25 | 0.831000 | 0.827000 | 0.893000 | 0.850000 |
display(rbos_model_3[rbos_model_3['Mean'] == rbos_model_3['Mean'].max()])
| p | d | ELI5_SKATER | ELI5_SHAP | SKATER_SHAP | Mean | |
|---|---|---|---|---|---|---|
| 13 | 0.944 | 18 | 0.836 | 0.829 | 0.891 | 0.852 |
rbos_model_4 = calculate_model_rbo(model_4, 5, 25, 1)
display(rbos_model_4.style
.background_gradient(
subset=['SKATER_SHAP'],
vmin=0.5, vmax=0.99, cmap=cm.get_cmap('Blues') )
.hide_index()
.background_gradient(
subset=['Mean'],
cmap=cm.get_cmap('Reds')))
| p | d | SKATER_SHAP | Mean |
|---|---|---|---|
| 0.800000 | 5 | 0.490000 | 0.490000 |
| 0.833000 | 6 | 0.548000 | 0.548000 |
| 0.857000 | 7 | 0.594000 | 0.594000 |
| 0.875000 | 8 | 0.679000 | 0.679000 |
| 0.889000 | 9 | 0.710000 | 0.710000 |
| 0.900000 | 10 | 0.695000 | 0.695000 |
| 0.909000 | 11 | 0.681000 | 0.681000 |
| 0.917000 | 12 | 0.699000 | 0.699000 |
| 0.923000 | 13 | 0.714000 | 0.714000 |
| 0.929000 | 14 | 0.756000 | 0.756000 |
| 0.933000 | 15 | 0.767000 | 0.767000 |
| 0.938000 | 16 | 0.779000 | 0.779000 |
| 0.941000 | 17 | 0.787000 | 0.787000 |
| 0.944000 | 18 | 0.775000 | 0.775000 |
| 0.947000 | 19 | 0.783000 | 0.783000 |
| 0.950000 | 20 | 0.791000 | 0.791000 |
| 0.952000 | 21 | 0.797000 | 0.797000 |
| 0.955000 | 22 | 0.805000 | 0.805000 |
| 0.957000 | 23 | 0.794000 | 0.794000 |
| 0.958000 | 24 | 0.814000 | 0.814000 |
| 0.960000 | 25 | 0.820000 | 0.820000 |
display(rbos_model_4[rbos_model_4['Mean'] == rbos_model_4['Mean'].max()])
| p | d | SKATER_SHAP | Mean | |
|---|---|---|---|---|
| 20 | 0.96 | 25 | 0.82 | 0.82 |
The partial dependence plot (short PDP or PD plot) shows the marginal effect one or two features have on the predicted outcome of a machine learning model. A partial dependence plot can show whether the relationship between the target and a feature is linear, monotonic or more complex. For example, when applied to a linear regression model, partial dependence plots always show a linear relationship.
PDPBox is the first module that we use for ploting partial dependence. We will generate two plots, one for only one feature - age and one for two features - age and education-num.
_ = generate_pdp_plots(PDPType.PDPBox, model_1, "age", "None")
13-May-23 15:55:50 - Generating a PDP plot using PDPBox for Model 1 ...
_ = generate_pdp_plots(PDPType.PDPBox, model_1, "age", "education-num")
13-May-23 15:55:55 - Generating a PDP plot using PDPBox for Model 1 ...
_ = generate_pdp_plots(PDPType.PDPBox, model_2, "age", "None")
13-May-23 15:55:56 - Generating a PDP plot using PDPBox for Model 2 ...
_ = generate_pdp_plots(PDPType.PDPBox, model_2, "age", "education-num")
13-May-23 15:56:01 - Generating a PDP plot using PDPBox for Model 2 ...
_ = generate_pdp_plots(PDPType.PDPBox, model_3, "age", "None")
13-May-23 15:56:01 - Generating a PDP plot using PDPBox for Model 3 ...
_ = generate_pdp_plots(PDPType.PDPBox, model_3, "age", "education-num")
13-May-23 15:56:33 - Generating a PDP plot using PDPBox for Model 3 ...
_ = generate_pdp_plots(PDPType.PDPBox, model_4, "age", "None")
13-May-23 15:59:58 - Generating a PDP plot using PDPBox for Model 4 ...
_ = generate_pdp_plots(PDPType.PDPBox, model_4, "age", "education-num")
13-May-23 16:00:32 - Generating a PDP plot using PDPBox for Model 4 ...
In the two examples below we will use Skater and SHAP for generating PDPs using features: age and education-num.
%matplotlib inline
plt.style.use('ggplot')
warnings.filterwarnings('ignore')
_ = generate_pdp_plots(PDPType.SKATER, model_1, "age", "education-num")
13-May-23 16:04:14 - Generating a PDP plot using SKATER for Model 1 ... 2023-05-13 16:04:15,099 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progressbar=False
[1152/1152] grid cells ████████████████████ Time elapsed: 30 seconds
_ = generate_pdp_plots(PDPType.SKATER, model_2, "age", "education-num")
13-May-23 16:04:46 - Generating a PDP plot using SKATER for Model 2 ... 2023-05-13 16:04:47,510 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progressbar=False
[1152/1152] grid cells ████████████████████ Time elapsed: 32 seconds
_ = generate_pdp_plots(PDPType.SKATER, model_3, "age", "education-num")
13-May-23 16:05:20 - Generating a PDP plot using SKATER for Model 3 ... 2023-05-13 16:05:21,440 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progressbar=False
[1152/1152] grid cells ████████████████████ Time elapsed: 3465 seconds
_ = generate_pdp_plots(PDPType.SKATER, model_4, "age", "education-num")
13-May-23 17:03:07 - Generating a PDP plot using SKATER for Model 4 ... 2023-05-13 17:03:08,681 - skater.core.explanations - WARNING - Progress bars slow down runs by 10-20%. For slightly faster runs, do progressbar=False
[1152/1152] grid cells ████████████████████ Time elapsed: 63 seconds
_ = generate_pdp_plots(PDPType.SHAP, model_1, "age", "education-num")
13-May-23 17:04:12 - Generating a PDP plot using SHAP for Model 1 ...
_ = generate_pdp_plots(PDPType.SHAP, model_2, "age", "education-num")
13-May-23 17:04:12 - Generating a PDP plot using SHAP for Model 2 ...
_ = generate_pdp_plots(PDPType.SHAP, model_3, "age", "education-num")
13-May-23 17:04:12 - Generating a PDP plot using SHAP for Model 3 ...
_ = generate_pdp_plots(PDPType.SHAP, model_4, "age", "education-num")
13-May-23 17:04:13 - Generating a PDP plot using SHAP for Model 4 ...
Local interpretation focuses on specifics of each individual and provides explanations that can lead to a better understanding of the feature contribution in smaller groups of individuals that are often overlooked by the global interpretation techniques. We will use two moduels for interpreting single instances - SHAP and LIME.
SHAP leverages the idea of Shapley values for model feature influence scoring. The technical definition of a Shapley value is the “average marginal contribution of a feature value over all possible coalitions.” In other words, Shapley values consider all possible predictions for an instance using all possible combinations of inputs. Because of this exhaustive approach, SHAP can guarantee properties like consistency and local accuracy. LIME, on the other hand, does not offer such guarantees.
LIME (Local Interpretable Model-agnostic Explanations) builds sparse linear models around each prediction to explain how the black box model works in that local vicinity. While treating the model as a black box, we perturb the instance we want to explain and learn a sparse linear model around it, as an explanation. LIME has the advantage over SHAP, that it is a lot faster.
examples = []
example_types = [ExampleType.FALSELY_CLASSIFIED]
for example_type in example_types:
for model in models:
example = get_test_examples(model, example_type, 1)[0]
while example in examples:
example = get_test_examples(model, example_type, 1)[0]
examples.append(example)
display(examples)
[1528, 7466, 9211, 8826]
example = examples[0]
print(get_example_information(model_1, example))
print(generate_single_instance_comparison(models, example))
Example 1528's data: age 25 workclass Private education Bachelors education-num 13 marital-status Married-civ-spouse occupation Adm-clerical relationship Husband ethnicity White gender Male capital-gain 0 capital-loss 0 hours-per-week 45 Name: 24384, dtype: object Actual result for example 1528: >50K Example 1528 was truly classified by no model and falsely classified by Model 1, Model 2, Model 3, Model 4. For further clarification see the explanations below.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_1, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_1, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_1, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_1, example))
13-May-23 17:04:19 - Generating a single instance explanation using OPTIMIZED_LIME for Model 1 ... 13-May-23 17:04:19 - Initializing LIME - generating new explainer for example 1528 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 17:11:57 - The optimal kernel width for example 1528 and Model 1 is 1.5826530612244896. Variables Stability Index (VSI): 92.0 Coefficients Stability Index (CSI): 100.0
13-May-23 17:12:05 - Generating a single instance explanation using SHAP for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.58. LIME's explanation: The most impactful feature for Model 1's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1959. The 2nd most impactful feature for Model 1's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.0987. The 3rd most impactful feature for Model 1's positive (1) prediction probability is gender= Male with value of 0.0841. The 4th most impactful feature for Model 1's positive (1) prediction probability is ethnicity= White with value of 0.0277. The 5th most impactful feature for Model 1's positive (1) prediction probability is workclass= Private with value of 0.0137. The 6th most impactful feature for Model 1's positive (1) prediction probability is 40.00 < hours-per-week <= 45.00 with value of 0.0107. The 7th most impactful feature for Model 1's positive (1) prediction probability is relationship= Husband with value of 0.0008. The most impactful feature for Model 1's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.638. The 2nd most impactful feature for Model 1's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1467. The 3rd most impactful feature for Model 1's negative (0) prediction probability is age <= 28.00 with value of -0.0489. The 4th most impactful feature for Model 1's negative (0) prediction probability is occupation= Adm-clerical with value of -0.0174. The 5th most impactful feature for Model 1's negative (0) prediction probability is education= Bachelors with value of -0.012.
The prediction probability of Model 1's decision for this example is 0.58. SHAP's explanation: The most impactful feature for Model 1's positive (1) prediction probability is age = 25 (-1.035997988093181) with value of 0.0582. The 2nd most impactful feature for Model 1's positive (1) prediction probability is capital-gain = 0 (-0.14721010722130554) with value of 0.057. The 3rd most impactful feature for Model 1's positive (1) prediction probability is education_ Bachelors = 1 with value of 0.0032. The most impactful feature for Model 1's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.2354. The 2nd most impactful feature for Model 1's negative (0) prediction probability is education-num = 13 (1.125092240761039) with value of -0.1473. The 3rd most impactful feature for Model 1's negative (0) prediction probability is occupation_ Adm-clerical = 1 with value of -0.0248. The 4th most impactful feature for Model 1's negative (0) prediction probability is hours-per-week = 45 (0.3347364227836373) with value of -0.019. The 5th most impactful feature for Model 1's negative (0) prediction probability is relationship_ Husband = 1 with value of -0.0026.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_2, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_2, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_2, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_2, example))
13-May-23 17:12:05 - Generating a single instance explanation using OPTIMIZED_LIME for Model 2 ... 13-May-23 17:12:05 - Initializing LIME - generating new explainer for example 1528 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 17:19:41 - The optimal kernel width for example 1528 and Model 2 is 3.215306122448979. Variables Stability Index (VSI): 93.46 Coefficients Stability Index (CSI): 92.89
13-May-23 17:19:48 - Generating a single instance explanation using SHAP for Model 2 ...
The prediction probability of Model 2's decision for this example is 0.68. LIME's explanation: The most impactful feature for Model 2's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1897. The 2nd most impactful feature for Model 2's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.0784. The 3rd most impactful feature for Model 2's positive (1) prediction probability is 40.00 < hours-per-week <= 45.00 with value of 0.0411. The 4th most impactful feature for Model 2's positive (1) prediction probability is gender= Male with value of 0.0028. The most impactful feature for Model 2's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5269. The 2nd most impactful feature for Model 2's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.0889. The 3rd most impactful feature for Model 2's negative (0) prediction probability is age <= 28.00 with value of -0.0729. The 4th most impactful feature for Model 2's negative (0) prediction probability is education= Bachelors with value of -0.0199. The 5th most impactful feature for Model 2's negative (0) prediction probability is relationship= Husband with value of -0.0108. The 6th most impactful feature for Model 2's negative (0) prediction probability is occupation= Adm-clerical with value of -0.0089. The 7th most impactful feature for Model 2's negative (0) prediction probability is workclass= Private with value of -0.0058. The 8th most impactful feature for Model 2's negative (0) prediction probability is ethnicity= White with value of -0.0006.
The prediction probability of Model 2's decision for this example is 0.68. SHAP's explanation: The most impactful feature for Model 2's positive (1) prediction probability is age = 25 (-1.035997988093181) with value of 0.1803. The 2nd most impactful feature for Model 2's positive (1) prediction probability is occupation_ Adm-clerical = 1 with value of 0.0056. The most impactful feature for Model 2's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.3159. The 2nd most impactful feature for Model 2's negative (0) prediction probability is education-num = 13 (1.125092240761039) with value of -0.0878. The 3rd most impactful feature for Model 2's negative (0) prediction probability is hours-per-week = 45 (0.3347364227836373) with value of -0.0263.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_3, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_3, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_3, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_3, example))
13-May-23 17:19:49 - Generating a single instance explanation using OPTIMIZED_LIME for Model 3 ... 13-May-23 17:19:49 - Initializing LIME - generating new explainer for example 1528 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 17:53:33 - The optimal kernel width for example 1528 and Model 3 is 1.3561224489795916. Variables Stability Index (VSI): 88.0 Coefficients Stability Index (CSI): 100.0
13-May-23 17:54:12 - Generating a single instance explanation using SHAP for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.55. LIME's explanation: The most impactful feature for Model 3's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1418. The 2nd most impactful feature for Model 3's positive (1) prediction probability is relationship= Husband with value of 0.0641. The 3rd most impactful feature for Model 3's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.0471. The 4th most impactful feature for Model 3's positive (1) prediction probability is 40.00 < hours-per-week <= 45.00 with value of 0.0377. The 5th most impactful feature for Model 3's positive (1) prediction probability is education= Bachelors with value of 0.0345. The 6th most impactful feature for Model 3's positive (1) prediction probability is gender= Male with value of 0.0164. The 7th most impactful feature for Model 3's positive (1) prediction probability is ethnicity= White with value of 0.0119. The most impactful feature for Model 3's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.4602. The 2nd most impactful feature for Model 3's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.0893. The 3rd most impactful feature for Model 3's negative (0) prediction probability is age <= 28.00 with value of -0.0838. The 4th most impactful feature for Model 3's negative (0) prediction probability is occupation= Adm-clerical with value of -0.0183. The 5th most impactful feature for Model 3's negative (0) prediction probability is workclass= Private with value of -0.0111.
The prediction probability of Model 3's decision for this example is 0.55. SHAP's explanation: The most impactful feature for Model 3's positive (1) prediction probability is age = 25 (-1.035997988093181) with value of 0.1282. The 2nd most impactful feature for Model 3's positive (1) prediction probability is occupation_ Adm-clerical = 1 with value of 0.0019. The most impactful feature for Model 3's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.1831. The 2nd most impactful feature for Model 3's negative (0) prediction probability is education-num = 13 (1.125092240761039) with value of -0.1117. The 3rd most impactful feature for Model 3's negative (0) prediction probability is relationship_ Husband = 1 with value of -0.1057. The 4th most impactful feature for Model 3's negative (0) prediction probability is hours-per-week = 45 (0.3347364227836373) with value of -0.0622. The 5th most impactful feature for Model 3's negative (0) prediction probability is education_ Bachelors = 1 with value of -0.0375. The 6th most impactful feature for Model 3's negative (0) prediction probability is capital-gain = 0 (-0.14721010722130554) with value of -0.0021.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_4, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_4, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_4, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_4, example))
13-May-23 17:54:17 - Generating a single instance explanation using OPTIMIZED_LIME for Model 4 ... 13-May-23 17:54:17 - Initializing LIME - generating new explainer for example 1528 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 18:22:26 - The optimal kernel width for example 1528 and Model 4 is 2.6622448979591833. Variables Stability Index (VSI): 92.05 Coefficients Stability Index (CSI): 92.89
13-May-23 18:22:58 - Generating a single instance explanation using SHAP for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.62. LIME's explanation: The most impactful feature for Model 4's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.0722. The 2nd most impactful feature for Model 4's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.0608. The 3rd most impactful feature for Model 4's positive (1) prediction probability is relationship= Husband with value of 0.0392. The 4th most impactful feature for Model 4's positive (1) prediction probability is 40.00 < hours-per-week <= 45.00 with value of 0.0178. The 5th most impactful feature for Model 4's positive (1) prediction probability is ethnicity= White with value of 0.0147. The 6th most impactful feature for Model 4's positive (1) prediction probability is education= Bachelors with value of 0.0114. The 7th most impactful feature for Model 4's positive (1) prediction probability is gender= Male with value of 0.0088. The 8th most impactful feature for Model 4's positive (1) prediction probability is workclass= Private with value of 0.0007. The most impactful feature for Model 4's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5525. The 2nd most impactful feature for Model 4's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1316. The 3rd most impactful feature for Model 4's negative (0) prediction probability is age <= 28.00 with value of -0.061. The 4th most impactful feature for Model 4's negative (0) prediction probability is occupation= Adm-clerical with value of -0.0135.
The prediction probability of Model 4's decision for this example is 0.62. SHAP's explanation: The most impactful feature for Model 4's positive (1) prediction probability is age = 25 (-1.035997988093181) with value of 0.1354. The 2nd most impactful feature for Model 4's positive (1) prediction probability is capital-gain = 0 (-0.14721010722130554) with value of 0.0347. The most impactful feature for Model 4's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.1286. The 2nd most impactful feature for Model 4's negative (0) prediction probability is education-num = 13 (1.125092240761039) with value of -0.1118. The 3rd most impactful feature for Model 4's negative (0) prediction probability is relationship_ Husband = 1 with value of -0.0785. The 4th most impactful feature for Model 4's negative (0) prediction probability is education_ Bachelors = 1 with value of -0.077. The 5th most impactful feature for Model 4's negative (0) prediction probability is hours-per-week = 45 (0.3347364227836373) with value of -0.0313. The 6th most impactful feature for Model 4's negative (0) prediction probability is occupation_ Adm-clerical = 1 with value of -0.029.
example = examples[1]
print(get_example_information(model_1, example))
print(generate_single_instance_comparison(models, example))
Example 7466's data: age 28 workclass Private education Bachelors education-num 13 marital-status Married-civ-spouse occupation Tech-support relationship Husband ethnicity White gender Male capital-gain 0 capital-loss 0 hours-per-week 40 Name: 10677, dtype: object Actual result for example 7466: >50K Example 7466 was truly classified by Model 1, Model 4 and falsely classified by Model 2, Model 3. For further clarification see the explanations below.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_1, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_1, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_1, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_1, example))
13-May-23 18:23:04 - Generating a single instance explanation using OPTIMIZED_LIME for Model 1 ... 13-May-23 18:23:04 - Initializing LIME - generating new explainer for example 7466 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 18:30:17 - The optimal kernel width for example 7466 and Model 1 is 1.0928571428571427. Variables Stability Index (VSI): 92.96 Coefficients Stability Index (CSI): 92.89
13-May-23 18:30:24 - Generating a single instance explanation using SHAP for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.55. LIME's explanation: The most impactful feature for Model 1's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1978. The 2nd most impactful feature for Model 1's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.1028. The 3rd most impactful feature for Model 1's positive (1) prediction probability is gender= Male with value of 0.0869. The 4th most impactful feature for Model 1's positive (1) prediction probability is occupation= Tech-support with value of 0.0509. The 5th most impactful feature for Model 1's positive (1) prediction probability is ethnicity= White with value of 0.0288. The 6th most impactful feature for Model 1's positive (1) prediction probability is workclass= Private with value of 0.0142. The 7th most impactful feature for Model 1's positive (1) prediction probability is relationship= Husband with value of 0.0017. The most impactful feature for Model 1's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.6303. The 2nd most impactful feature for Model 1's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1531. The 3rd most impactful feature for Model 1's negative (0) prediction probability is hours-per-week <= 40.00 with value of -0.08. The 4th most impactful feature for Model 1's negative (0) prediction probability is age <= 28.00 with value of -0.054. The 5th most impactful feature for Model 1's negative (0) prediction probability is education= Bachelors with value of -0.0106.
The prediction probability of Model 1's decision for this example is 0.55. SHAP's explanation: The most impactful feature for Model 1's positive (1) prediction probability is marital-status_ Married-civ-spouse = 1 with value of 0.2574. The 2nd most impactful feature for Model 1's positive (1) prediction probability is education-num = 13 (1.125092240761039) with value of 0.1588. The 3rd most impactful feature for Model 1's positive (1) prediction probability is occupation_ Tech-support = 1 with value of 0.1353. The 4th most impactful feature for Model 1's positive (1) prediction probability is relationship_ Husband = 1 with value of 0.0027. The most impactful feature for Model 1's negative (0) prediction probability is capital-gain = 0 (-0.14721010722130554) with value of -0.0596. The 2nd most impactful feature for Model 1's negative (0) prediction probability is age = 28 (-0.8082945381535962) with value of -0.0479. The 3rd most impactful feature for Model 1's negative (0) prediction probability is hours-per-week = 40 (-0.0792926836952247) with value of -0.005. The 4th most impactful feature for Model 1's negative (0) prediction probability is education_ Bachelors = 1 with value of -0.0033.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_2, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_2, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_2, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_2, example))
13-May-23 18:30:24 - Generating a single instance explanation using OPTIMIZED_LIME for Model 2 ... 13-May-23 18:30:24 - Initializing LIME - generating new explainer for example 7466 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 18:38:00 - The optimal kernel width for example 7466 and Model 2 is 3.215306122448979. Variables Stability Index (VSI): 92.0 Coefficients Stability Index (CSI): 100.0
13-May-23 18:38:07 - Generating a single instance explanation using SHAP for Model 2 ...
The prediction probability of Model 2's decision for this example is 0.68. LIME's explanation: The most impactful feature for Model 2's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1896. The 2nd most impactful feature for Model 2's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.0791. The 3rd most impactful feature for Model 2's positive (1) prediction probability is occupation= Tech-support with value of 0.0174. The 4th most impactful feature for Model 2's positive (1) prediction probability is gender= Male with value of 0.0033. The most impactful feature for Model 2's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5275. The 2nd most impactful feature for Model 2's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.0884. The 3rd most impactful feature for Model 2's negative (0) prediction probability is age <= 28.00 with value of -0.0723. The 4th most impactful feature for Model 2's negative (0) prediction probability is hours-per-week <= 40.00 with value of -0.068. The 5th most impactful feature for Model 2's negative (0) prediction probability is education= Bachelors with value of -0.0193. The 6th most impactful feature for Model 2's negative (0) prediction probability is relationship= Husband with value of -0.01. The 7th most impactful feature for Model 2's negative (0) prediction probability is workclass= Private with value of -0.0062. The 8th most impactful feature for Model 2's negative (0) prediction probability is ethnicity= White with value of -0.0019.
The prediction probability of Model 2's decision for this example is 0.68. SHAP's explanation: The most impactful feature for Model 2's positive (1) prediction probability is age = 28 (-0.8082945381535962) with value of 0.143. The 2nd most impactful feature for Model 2's positive (1) prediction probability is hours-per-week = 40 (-0.0792926836952247) with value of 0.0203. The most impactful feature for Model 2's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.3374. The 2nd most impactful feature for Model 2's negative (0) prediction probability is education-num = 13 (1.125092240761039) with value of -0.07.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_3, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_3, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_3, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_3, example))
13-May-23 18:38:08 - Generating a single instance explanation using OPTIMIZED_LIME for Model 3 ... 13-May-23 18:38:08 - Initializing LIME - generating new explainer for example 7466 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 19:12:26 - The optimal kernel width for example 7466 and Model 3 is 1.1193877551020406. Variables Stability Index (VSI): 91.56 Coefficients Stability Index (CSI): 100.0
13-May-23 19:13:03 - Generating a single instance explanation using SHAP for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.6. LIME's explanation: The most impactful feature for Model 3's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1395. The 2nd most impactful feature for Model 3's positive (1) prediction probability is relationship= Husband with value of 0.0628. The 3rd most impactful feature for Model 3's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.048. The 4th most impactful feature for Model 3's positive (1) prediction probability is education= Bachelors with value of 0.0358. The 5th most impactful feature for Model 3's positive (1) prediction probability is occupation= Tech-support with value of 0.0307. The 6th most impactful feature for Model 3's positive (1) prediction probability is gender= Male with value of 0.0153. The 7th most impactful feature for Model 3's positive (1) prediction probability is ethnicity= White with value of 0.0112. The most impactful feature for Model 3's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.4589. The 2nd most impactful feature for Model 3's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.092. The 3rd most impactful feature for Model 3's negative (0) prediction probability is age <= 28.00 with value of -0.088. The 4th most impactful feature for Model 3's negative (0) prediction probability is hours-per-week <= 40.00 with value of -0.0749. The 5th most impactful feature for Model 3's negative (0) prediction probability is workclass= Private with value of -0.0117.
The prediction probability of Model 3's decision for this example is 0.6. SHAP's explanation: The most impactful feature for Model 3's positive (1) prediction probability is age = 28 (-0.8082945381535962) with value of 0.1224. The 2nd most impactful feature for Model 3's positive (1) prediction probability is hours-per-week = 40 (-0.0792926836952247) with value of 0.0084. The most impactful feature for Model 3's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.2042. The 2nd most impactful feature for Model 3's negative (0) prediction probability is relationship_ Husband = 1 with value of -0.1035. The 3rd most impactful feature for Model 3's negative (0) prediction probability is education-num = 13 (1.125092240761039) with value of -0.0787. The 4th most impactful feature for Model 3's negative (0) prediction probability is occupation_ Tech-support = 1 with value of -0.0461. The 5th most impactful feature for Model 3's negative (0) prediction probability is education_ Bachelors = 1 with value of -0.0198. The 6th most impactful feature for Model 3's negative (0) prediction probability is capital-gain = 0 (-0.14721010722130554) with value of -0.002.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_4, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_4, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_4, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_4, example))
13-May-23 19:13:09 - Generating a single instance explanation using OPTIMIZED_LIME for Model 4 ... 13-May-23 19:13:09 - Initializing LIME - generating new explainer for example 7466 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 19:41:19 - The optimal kernel width for example 7466 and Model 4 is 2.1091836734693876. Variables Stability Index (VSI): 93.39 Coefficients Stability Index (CSI): 89.78
13-May-23 19:41:50 - Generating a single instance explanation using SHAP for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.52. LIME's explanation: The most impactful feature for Model 4's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.0722. The 2nd most impactful feature for Model 4's positive (1) prediction probability is occupation= Tech-support with value of 0.0621. The 3rd most impactful feature for Model 4's positive (1) prediction probability is 10.00 < education-num <= 13.00 with value of 0.0616. The 4th most impactful feature for Model 4's positive (1) prediction probability is relationship= Husband with value of 0.0389. The 5th most impactful feature for Model 4's positive (1) prediction probability is ethnicity= White with value of 0.0143. The 6th most impactful feature for Model 4's positive (1) prediction probability is education= Bachelors with value of 0.0125. The 7th most impactful feature for Model 4's positive (1) prediction probability is gender= Male with value of 0.009. The 8th most impactful feature for Model 4's positive (1) prediction probability is workclass= Private with value of 0.0009. The most impactful feature for Model 4's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5529. The 2nd most impactful feature for Model 4's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1326. The 3rd most impactful feature for Model 4's negative (0) prediction probability is age <= 28.00 with value of -0.062. The 4th most impactful feature for Model 4's negative (0) prediction probability is hours-per-week <= 40.00 with value of -0.0492.
The prediction probability of Model 4's decision for this example is 0.52. SHAP's explanation: The most impactful feature for Model 4's positive (1) prediction probability is marital-status_ Married-civ-spouse = 1 with value of 0.1738. The 2nd most impactful feature for Model 4's positive (1) prediction probability is occupation_ Tech-support = 1 with value of 0.1527. The 3rd most impactful feature for Model 4's positive (1) prediction probability is education-num = 13 (1.125092240761039) with value of 0.1313. The 4th most impactful feature for Model 4's positive (1) prediction probability is relationship_ Husband = 1 with value of 0.1135. The 5th most impactful feature for Model 4's positive (1) prediction probability is education_ Bachelors = 1 with value of 0.052. The most impactful feature for Model 4's negative (0) prediction probability is age = 28 (-0.8082945381535962) with value of -0.1552. The 2nd most impactful feature for Model 4's negative (0) prediction probability is capital-gain = 0 (-0.14721010722130554) with value of -0.0399. The 3rd most impactful feature for Model 4's negative (0) prediction probability is hours-per-week = 40 (-0.0792926836952247) with value of -0.0052.
example = examples[2]
print(get_example_information(model_1, example))
print(generate_single_instance_comparison(models, example))
Example 9211's data: age 44 workclass Private education Some-college education-num 10 marital-status Married-civ-spouse occupation Prof-specialty relationship Husband ethnicity White gender Male capital-gain 0 capital-loss 0 hours-per-week 45 Name: 18886, dtype: object Actual result for example 9211: >50K Example 9211 was truly classified by no model and falsely classified by Model 1, Model 2, Model 3, Model 4. For further clarification see the explanations below.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_1, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_1, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_1, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_1, example))
13-May-23 19:41:55 - Generating a single instance explanation using OPTIMIZED_LIME for Model 1 ... 13-May-23 19:41:55 - Initializing LIME - generating new explainer for example 9211 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 19:49:22 - The optimal kernel width for example 9211 and Model 1 is 1.4561224489795916. Variables Stability Index (VSI): 90.67 Coefficients Stability Index (CSI): 100.0
13-May-23 19:49:30 - Generating a single instance explanation using SHAP for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.53. LIME's explanation: The most impactful feature for Model 1's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1985. The 2nd most impactful feature for Model 1's positive (1) prediction probability is gender= Male with value of 0.0859. The 3rd most impactful feature for Model 1's positive (1) prediction probability is occupation= Prof-specialty with value of 0.0495. The 4th most impactful feature for Model 1's positive (1) prediction probability is ethnicity= White with value of 0.0287. The 5th most impactful feature for Model 1's positive (1) prediction probability is workclass= Private with value of 0.0161. The 6th most impactful feature for Model 1's positive (1) prediction probability is 40.00 < hours-per-week <= 45.00 with value of 0.009. The 7th most impactful feature for Model 1's positive (1) prediction probability is education= Some-college with value of 0.0082. The 8th most impactful feature for Model 1's positive (1) prediction probability is 37.00 < age <= 47.00 with value of 0.0059. The 9th most impactful feature for Model 1's positive (1) prediction probability is relationship= Husband with value of 0.0017. The most impactful feature for Model 1's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.6368. The 2nd most impactful feature for Model 1's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1492. The 3rd most impactful feature for Model 1's negative (0) prediction probability is 9.00 < education-num <= 10.00 with value of -0.0015.
The prediction probability of Model 1's decision for this example is 0.53. SHAP's explanation: The most impactful feature for Model 1's positive (1) prediction probability is capital-gain = 0 (-0.14721010722130554) with value of 0.0588. The most impactful feature for Model 1's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.2465. The 2nd most impactful feature for Model 1's negative (0) prediction probability is occupation_ Prof-specialty = 1 with value of -0.1173. The 3rd most impactful feature for Model 1's negative (0) prediction probability is age = 44 (0.40612386152419017) with value of -0.0217. The 4th most impactful feature for Model 1's negative (0) prediction probability is hours-per-week = 45 (0.3347364227836373) with value of -0.0197. The 5th most impactful feature for Model 1's negative (0) prediction probability is education_ Some-college = 1 with value of -0.0089. The 6th most impactful feature for Model 1's negative (0) prediction probability is relationship_ Husband = 1 with value of -0.0026.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_2, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_2, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_2, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_2, example))
13-May-23 19:49:30 - Generating a single instance explanation using OPTIMIZED_LIME for Model 2 ... 13-May-23 19:49:30 - Initializing LIME - generating new explainer for example 9211 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 19:57:07 - The optimal kernel width for example 9211 and Model 2 is 4.7214285714285715. Variables Stability Index (VSI): 95.23 Coefficients Stability Index (CSI): 80.0
13-May-23 19:57:15 - Generating a single instance explanation using SHAP for Model 2 ...
The prediction probability of Model 2's decision for this example is 0.62. LIME's explanation: The most impactful feature for Model 2's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1891. The 2nd most impactful feature for Model 2's positive (1) prediction probability is 40.00 < hours-per-week <= 45.00 with value of 0.0391. The 3rd most impactful feature for Model 2's positive (1) prediction probability is 37.00 < age <= 47.00 with value of 0.039. The 4th most impactful feature for Model 2's positive (1) prediction probability is occupation= Prof-specialty with value of 0.0212. The 5th most impactful feature for Model 2's positive (1) prediction probability is education= Some-college with value of 0.0161. The 6th most impactful feature for Model 2's positive (1) prediction probability is gender= Male with value of 0.0026. The most impactful feature for Model 2's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5256. The 2nd most impactful feature for Model 2's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.0879. The 3rd most impactful feature for Model 2's negative (0) prediction probability is 9.00 < education-num <= 10.00 with value of -0.0134. The 4th most impactful feature for Model 2's negative (0) prediction probability is relationship= Husband with value of -0.0107. The 5th most impactful feature for Model 2's negative (0) prediction probability is workclass= Private with value of -0.0048. The 6th most impactful feature for Model 2's negative (0) prediction probability is ethnicity= White with value of -0.0001.
The prediction probability of Model 2's decision for this example is 0.62. SHAP's explanation: The most impactful feature for Model 2's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.2976.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_3, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_3, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_3, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_3, example))
13-May-23 19:57:15 - Generating a single instance explanation using OPTIMIZED_LIME for Model 3 ... 13-May-23 19:57:15 - Initializing LIME - generating new explainer for example 9211 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 20:29:39 - The optimal kernel width for example 9211 and Model 3 is 1.1295918367346938. Variables Stability Index (VSI): 85.78 Coefficients Stability Index (CSI): 100.0
13-May-23 20:30:16 - Generating a single instance explanation using SHAP for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.55. LIME's explanation: The most impactful feature for Model 3's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1458. The 2nd most impactful feature for Model 3's positive (1) prediction probability is relationship= Husband with value of 0.0681. The 3rd most impactful feature for Model 3's positive (1) prediction probability is occupation= Prof-specialty with value of 0.064. The 4th most impactful feature for Model 3's positive (1) prediction probability is 37.00 < age <= 47.00 with value of 0.0474. The 5th most impactful feature for Model 3's positive (1) prediction probability is 40.00 < hours-per-week <= 45.00 with value of 0.0364. The 6th most impactful feature for Model 3's positive (1) prediction probability is gender= Male with value of 0.0184. The 7th most impactful feature for Model 3's positive (1) prediction probability is ethnicity= White with value of 0.0136. The 8th most impactful feature for Model 3's positive (1) prediction probability is education= Some-college with value of 0.0057. The most impactful feature for Model 3's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.4584. The 2nd most impactful feature for Model 3's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.0961. The 3rd most impactful feature for Model 3's negative (0) prediction probability is workclass= Private with value of -0.0072. The 4th most impactful feature for Model 3's negative (0) prediction probability is 9.00 < education-num <= 10.00 with value of -0.0035.
The prediction probability of Model 3's decision for this example is 0.55. SHAP's explanation: The most impactful feature for Model 3's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.1974. The 2nd most impactful feature for Model 3's negative (0) prediction probability is relationship_ Husband = 1 with value of -0.0942. The 3rd most impactful feature for Model 3's negative (0) prediction probability is occupation_ Prof-specialty = 1 with value of -0.0568. The 4th most impactful feature for Model 3's negative (0) prediction probability is hours-per-week = 45 (0.3347364227836373) with value of -0.0161. The 5th most impactful feature for Model 3's negative (0) prediction probability is capital-gain = 0 (-0.14721010722130554) with value of -0.0033. The 6th most impactful feature for Model 3's negative (0) prediction probability is age = 44 (0.40612386152419017) with value of -0.002. The 7th most impactful feature for Model 3's negative (0) prediction probability is education_ Some-college = 1 with value of -0.0019.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_4, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_4, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_4, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_4, example))
13-May-23 20:30:21 - Generating a single instance explanation using OPTIMIZED_LIME for Model 4 ... 13-May-23 20:30:21 - Initializing LIME - generating new explainer for example 9211 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 20:58:32 - The optimal kernel width for example 9211 and Model 4 is 1.6193877551020406. Variables Stability Index (VSI): 90.74 Coefficients Stability Index (CSI): 92.89
13-May-23 20:59:03 - Generating a single instance explanation using SHAP for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.54. LIME's explanation: The most impactful feature for Model 4's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.074. The 2nd most impactful feature for Model 4's positive (1) prediction probability is occupation= Prof-specialty with value of 0.0538. The 3rd most impactful feature for Model 4's positive (1) prediction probability is relationship= Husband with value of 0.0411. The 4th most impactful feature for Model 4's positive (1) prediction probability is 37.00 < age <= 47.00 with value of 0.0345. The 5th most impactful feature for Model 4's positive (1) prediction probability is 40.00 < hours-per-week <= 45.00 with value of 0.0171. The 6th most impactful feature for Model 4's positive (1) prediction probability is ethnicity= White with value of 0.0147. The 7th most impactful feature for Model 4's positive (1) prediction probability is gender= Male with value of 0.008. The 8th most impactful feature for Model 4's positive (1) prediction probability is workclass= Private with value of 0.0022. The most impactful feature for Model 4's negative (0) prediction probability is capital-gain <= 0.00 with value of -0.5573. The 2nd most impactful feature for Model 4's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1505. The 3rd most impactful feature for Model 4's negative (0) prediction probability is 9.00 < education-num <= 10.00 with value of -0.0081. The 4th most impactful feature for Model 4's negative (0) prediction probability is education= Some-college with value of -0.0007.
The prediction probability of Model 4's decision for this example is 0.54. SHAP's explanation: The most impactful feature for Model 4's positive (1) prediction probability is capital-gain = 0 (-0.14721010722130554) with value of 0.057. The 2nd most impactful feature for Model 4's positive (1) prediction probability is education_ Some-college = 1 with value of 0.0072. The most impactful feature for Model 4's negative (0) prediction probability is occupation_ Prof-specialty = 1 with value of -0.1578. The 2nd most impactful feature for Model 4's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.1475. The 3rd most impactful feature for Model 4's negative (0) prediction probability is relationship_ Husband = 1 with value of -0.0787. The 4th most impactful feature for Model 4's negative (0) prediction probability is age = 44 (0.40612386152419017) with value of -0.0337. The 5th most impactful feature for Model 4's negative (0) prediction probability is hours-per-week = 45 (0.3347364227836373) with value of -0.0141.
example = examples[3]
print(get_example_information(model_1, example))
print(generate_single_instance_comparison(models, example))
Example 8826's data: age 29 workclass Private education Some-college education-num 10 marital-status Married-civ-spouse occupation Exec-managerial relationship Husband ethnicity White gender Male capital-gain 3411 capital-loss 0 hours-per-week 70 Name: 30001, dtype: object Actual result for example 8826: <=50K Example 8826 was truly classified by Model 2, Model 3 and falsely classified by Model 1, Model 4. For further clarification see the explanations below.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_1, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_1, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_1, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_1, example))
13-May-23 20:59:09 - Generating a single instance explanation using OPTIMIZED_LIME for Model 1 ... 13-May-23 20:59:09 - Initializing LIME - generating new explainer for example 8826 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 21:06:05 - The optimal kernel width for example 8826 and Model 1 is 1.2295918367346939. Variables Stability Index (VSI): 92.0 Coefficients Stability Index (CSI): 100.0
13-May-23 21:06:13 - Generating a single instance explanation using SHAP for Model 1 ...
The prediction probability of Model 1's decision for this example is 0.83. LIME's explanation: The most impactful feature for Model 1's positive (1) prediction probability is capital-gain > 0.00 with value of 0.6295. The 2nd most impactful feature for Model 1's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.199. The 3rd most impactful feature for Model 1's positive (1) prediction probability is occupation= Exec-managerial with value of 0.0963. The 4th most impactful feature for Model 1's positive (1) prediction probability is gender= Male with value of 0.0913. The 5th most impactful feature for Model 1's positive (1) prediction probability is hours-per-week > 45.00 with value of 0.0832. The 6th most impactful feature for Model 1's positive (1) prediction probability is ethnicity= White with value of 0.0321. The 7th most impactful feature for Model 1's positive (1) prediction probability is workclass= Private with value of 0.0136. The 8th most impactful feature for Model 1's positive (1) prediction probability is education= Some-college with value of 0.0061. The 9th most impactful feature for Model 1's positive (1) prediction probability is relationship= Husband with value of 0.0037. The most impactful feature for Model 1's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1474. The 2nd most impactful feature for Model 1's negative (0) prediction probability is 28.00 < age <= 37.00 with value of -0.0262. The 3rd most impactful feature for Model 1's negative (0) prediction probability is 9.00 < education-num <= 10.00 with value of -0.0019.
The prediction probability of Model 1's decision for this example is 0.83. SHAP's explanation: The most impactful feature for Model 1's positive (1) prediction probability is marital-status_ Married-civ-spouse = 1 with value of 0.2719. The 2nd most impactful feature for Model 1's positive (1) prediction probability is occupation_ Exec-managerial = 1 with value of 0.1795. The 3rd most impactful feature for Model 1's positive (1) prediction probability is hours-per-week = 70 (2.4048819551779475) with value of 0.1523. The 4th most impactful feature for Model 1's positive (1) prediction probability is capital-gain = 3411 (0.31460287543050414) with value of 0.1414. The 5th most impactful feature for Model 1's positive (1) prediction probability is education_ Some-college = 1 with value of 0.0088. The 6th most impactful feature for Model 1's positive (1) prediction probability is relationship_ Husband = 1 with value of 0.0026. The most impactful feature for Model 1's negative (0) prediction probability is age = 29 (-0.7323933881737346) with value of -0.0419.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_2, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_2, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_2, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_2, example))
13-May-23 21:06:13 - Generating a single instance explanation using OPTIMIZED_LIME for Model 2 ... 13-May-23 21:06:13 - Initializing LIME - generating new explainer for example 8826 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 21:13:51 - The optimal kernel width for example 8826 and Model 2 is 3.8112244897959178. Variables Stability Index (VSI): 91.51 Coefficients Stability Index (CSI): 83.56
13-May-23 21:13:59 - Generating a single instance explanation using SHAP for Model 2 ...
The prediction probability of Model 2's decision for this example is 0.7. LIME's explanation: The most impactful feature for Model 2's positive (1) prediction probability is capital-gain > 0.00 with value of 0.5255. The 2nd most impactful feature for Model 2's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1898. The 3rd most impactful feature for Model 2's positive (1) prediction probability is hours-per-week > 45.00 with value of 0.0639. The 4th most impactful feature for Model 2's positive (1) prediction probability is occupation= Exec-managerial with value of 0.0293. The 5th most impactful feature for Model 2's positive (1) prediction probability is education= Some-college with value of 0.0149. The 6th most impactful feature for Model 2's positive (1) prediction probability is gender= Male with value of 0.0029. The most impactful feature for Model 2's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.0857. The 2nd most impactful feature for Model 2's negative (0) prediction probability is 9.00 < education-num <= 10.00 with value of -0.0135. The 3rd most impactful feature for Model 2's negative (0) prediction probability is relationship= Husband with value of -0.0106. The 4th most impactful feature for Model 2's negative (0) prediction probability is workclass= Private with value of -0.0053. The 5th most impactful feature for Model 2's negative (0) prediction probability is 28.00 < age <= 37.00 with value of -0.0032. The 6th most impactful feature for Model 2's negative (0) prediction probability is ethnicity= White with value of -0.0008.
The prediction probability of Model 2's decision for this example is 0.7. SHAP's explanation: The most impactful feature for Model 2's positive (1) prediction probability is age = 29 (-0.7323933881737346) with value of 0.0999. The most impactful feature for Model 2's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.288. The 2nd most impactful feature for Model 2's negative (0) prediction probability is occupation_ Exec-managerial = 1 with value of -0.0301.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_3, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_3, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_3, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_3, example))
13-May-23 21:13:59 - Generating a single instance explanation using OPTIMIZED_LIME for Model 3 ... 13-May-23 21:13:59 - Initializing LIME - generating new explainer for example 8826 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 21:48:31 - The optimal kernel width for example 8826 and Model 3 is 2.2357142857142853. Variables Stability Index (VSI): 94.67 Coefficients Stability Index (CSI): 100.0
13-May-23 21:49:08 - Generating a single instance explanation using SHAP for Model 3 ...
The prediction probability of Model 3's decision for this example is 0.67. LIME's explanation: The most impactful feature for Model 3's positive (1) prediction probability is capital-gain > 0.00 with value of 0.4617. The 2nd most impactful feature for Model 3's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.1407. The 3rd most impactful feature for Model 3's positive (1) prediction probability is occupation= Exec-managerial with value of 0.0713. The 4th most impactful feature for Model 3's positive (1) prediction probability is hours-per-week > 45.00 with value of 0.0661. The 5th most impactful feature for Model 3's positive (1) prediction probability is relationship= Husband with value of 0.0654. The 6th most impactful feature for Model 3's positive (1) prediction probability is gender= Male with value of 0.0179. The 7th most impactful feature for Model 3's positive (1) prediction probability is ethnicity= White with value of 0.0129. The 8th most impactful feature for Model 3's positive (1) prediction probability is education= Some-college with value of 0.0008. The most impactful feature for Model 3's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.0841. The 2nd most impactful feature for Model 3's negative (0) prediction probability is workclass= Private with value of -0.0105. The 3rd most impactful feature for Model 3's negative (0) prediction probability is 28.00 < age <= 37.00 with value of -0.0079. The 4th most impactful feature for Model 3's negative (0) prediction probability is 9.00 < education-num <= 10.00 with value of -0.0039.
The prediction probability of Model 3's decision for this example is 0.67. SHAP's explanation: The most impactful feature for Model 3's positive (1) prediction probability is age = 29 (-0.7323933881737346) with value of 0.0971. The 2nd most impactful feature for Model 3's positive (1) prediction probability is capital-gain = 3411 (0.31460287543050414) with value of 0.0253. The most impactful feature for Model 3's negative (0) prediction probability is marital-status_ Married-civ-spouse = 1 with value of -0.166. The 2nd most impactful feature for Model 3's negative (0) prediction probability is occupation_ Exec-managerial = 1 with value of -0.1036. The 3rd most impactful feature for Model 3's negative (0) prediction probability is relationship_ Husband = 1 with value of -0.0523. The 4th most impactful feature for Model 3's negative (0) prediction probability is hours-per-week = 70 (2.4048819551779475) with value of -0.0493. The 5th most impactful feature for Model 3's negative (0) prediction probability is education_ Some-college = 1 with value of -0.0036.
# LIME
explanation = explain_single_instance(LocalInterpreterType.OPTIMIZED_LIME, model_4, example)
explanation.show_in_notebook(show_table=True, show_all=True)
print(generate_single_instance_explanation(LocalInterpreterType.OPTIMIZED_LIME, model_4, example))
# SHAP
explanation = explain_single_instance(LocalInterpreterType.SHAP, model_4, example)
display(explanation)
print(generate_single_instance_explanation(LocalInterpreterType.SHAP, model_4, example))
13-May-23 21:49:14 - Generating a single instance explanation using OPTIMIZED_LIME for Model 4 ... 13-May-23 21:49:14 - Initializing LIME - generating new explainer for example 8826 and optimizing the kernel width. This operation may be time-consuming so please be patient. 13-May-23 22:17:25 - The optimal kernel width for example 8826 and Model 4 is 5.047959183673469. Variables Stability Index (VSI): 84.0 Coefficients Stability Index (CSI): 100.0
13-May-23 22:17:56 - Generating a single instance explanation using SHAP for Model 4 ...
The prediction probability of Model 4's decision for this example is 0.52. LIME's explanation: The most impactful feature for Model 4's positive (1) prediction probability is capital-gain > 0.00 with value of 0.5517. The 2nd most impactful feature for Model 4's positive (1) prediction probability is marital-status= Married-civ-spouse with value of 0.071. The 3rd most impactful feature for Model 4's positive (1) prediction probability is occupation= Exec-managerial with value of 0.0496. The 4th most impactful feature for Model 4's positive (1) prediction probability is hours-per-week > 45.00 with value of 0.048. The 5th most impactful feature for Model 4's positive (1) prediction probability is relationship= Husband with value of 0.0393. The 6th most impactful feature for Model 4's positive (1) prediction probability is ethnicity= White with value of 0.014. The 7th most impactful feature for Model 4's positive (1) prediction probability is gender= Male with value of 0.0087. The 8th most impactful feature for Model 4's positive (1) prediction probability is workclass= Private with value of 0.0012. The most impactful feature for Model 4's negative (0) prediction probability is capital-loss <= 0.00 with value of -0.1264. The 2nd most impactful feature for Model 4's negative (0) prediction probability is 28.00 < age <= 37.00 with value of -0.0144. The 3rd most impactful feature for Model 4's negative (0) prediction probability is 9.00 < education-num <= 10.00 with value of -0.008. The 4th most impactful feature for Model 4's negative (0) prediction probability is education= Some-college with value of -0.0016.
The prediction probability of Model 4's decision for this example is 0.52. SHAP's explanation: The most impactful feature for Model 4's positive (1) prediction probability is marital-status_ Married-civ-spouse = 1 with value of 0.1618. The 2nd most impactful feature for Model 4's positive (1) prediction probability is occupation_ Exec-managerial = 1 with value of 0.1504. The 3rd most impactful feature for Model 4's positive (1) prediction probability is capital-gain = 3411 (0.31460287543050414) with value of 0.1403. The 4th most impactful feature for Model 4's positive (1) prediction probability is relationship_ Husband = 1 with value of 0.083. The 5th most impactful feature for Model 4's positive (1) prediction probability is hours-per-week = 70 (2.4048819551779475) with value of 0.0415. The 6th most impactful feature for Model 4's positive (1) prediction probability is education_ Some-college = 1 with value of 0.0018. The most impactful feature for Model 4's negative (0) prediction probability is age = 29 (-0.7323933881737346) with value of -0.1512.